Importing important libraries

In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

Loading the training dataset

For demonstration purposes, let us take a 2 dimensional dataset with tow features (Feature_1 and Feature_2) and consisting of two classes (Class A and Class B) having a distribution specifications as follows:

Class A: The Class A is centred around the mean of (1,1) and has the covariance matrix [[1,-0.2],[-0.2,1]]

Class B: The Class B is centred around the mean of (3,4) and has the covariance matrix [[1,0.1],[0.1,1]]

Defintions:

Mean: A Class with centre (x1, x2) as mean denotes that the average value along "Feature_1" is x1 and the average value along "Feature_2" is x2

P.S: Since we would like to ensure that the outputs corrosponds to the desired output, we will also add the seed value of 42 while generating these distributions.

In [2]:
mean_01=np.array([1,1])
cov_01=np.array([[1,-0.2],[-0.2,1]])
mean_02=np.array([3,4])
cov_02=np.array([[1,0.1],[0.1,1]])
np.random.seed(42)
data_01=np.random.multivariate_normal(mean_01,cov_01,500, check_valid= "warn")
data_02=np.random.multivariate_normal(mean_02,cov_02,500, check_valid= "warn")
data = np.vstack((data_01,data_02))
df_train = pd.DataFrame(data, columns = ["Feature_1", "Feature_2"])
df_train["class"] = [0]*500 + [1]*500 

Visualising the dataset

In [3]:
fig = go.Figure(
    layout = dict(
        width = 800,
        height = 800,
        title_text = "Visualization of the dataset",
        xaxis = dict(
            title = dict(
                text = "Feature_1"
            )
        ),
        yaxis = dict(
            title = dict(
                text = "Feature_2"
            )
        )
    )
)

scatter_trace_1 = go.Scatter(
    x = df_train["Feature_1"][:500],
    y = df_train["Feature_2"][:500],
    mode = "markers",
    name= "Class A",
    hovertemplate = "Feature_1: %{x}<br>Feature_2: %{y}",
    marker = dict(
        size = 9,
        opacity = .80,
        color = "lightblue",
        line = dict(
            color = "blue",
            width = 1,
        )
    )
)

scatter_trace_2 = go.Scatter(
    x = df_train["Feature_1"][500:],
    y = df_train["Feature_2"][500:],
    mode = "markers",
    name= "Class B",
    hovertemplate = "Feature_1: %{x}<br>Feature_2: %{y}",
    marker = dict(
        symbol = "star-triangle-up",
        size = 10,
        opacity = 0.65,
        color ="darkorange",
        line = dict(
            color = "red",
            width = 1,
        )
    )
)

fig.add_trace(scatter_trace_1)
fig.add_trace(scatter_trace_2)
fig.show()
# fig.write_html(r".\expected outputs\expectedoutput1.html")

Machine Learning Model

Preparing training and test sets

In [4]:
X = df_train[["Feature_1","Feature_2"]]
Y = df_train[["class"]]
X = np.hstack((np.ones((1000,1)),X.to_numpy()))
X_train, X_test, Y_train, Y_test = train_test_split(X, Y.to_numpy(), test_size=0.2, random_state=42)
print(X_train.shape,Y_train.shape,X_test.shape,Y_test.shape)
(800, 3) (800, 1) (200, 3) (200, 1)

Defining the model

In [5]:
def hypothesis(x,theta):
    sigmoid=(1.0/(1.0 + np.exp(-1.0*np.dot(x,theta))))
    return(sigmoid)

def error(X,Y,theta):
    m=X.shape[0]
    err=0
    for i in range(m):
        hx=hypothesis(X[i],theta)
        err+=Y[i]*np.log2(hx) + (1-Y[i])*np.log2(1-hx)
    err /=m
    return(-err)

def gradient(X,Y,theta):
    grad=np.zeros((X.shape[1]))
    m=X.shape[0]
    fea=X.shape[1]
    for i in range(m):
        hx=hypothesis(X[i],theta)
        for j in range(fea):
            grad[j]+=(hx-Y[i])*X[i,j]
    grad=grad/m
    return(grad)
    
def gradient_ascent(X,Y,learning_rate=0.5):
    theta=2*np.random.random(X.shape[1])
    theta[0]=0
    error_list=[]
    acc_list=[]
    theta_list = []
    for i in range(100):
        grad=gradient(X,Y,theta)
        err=error(X,Y,theta)
        error_list.append(err)
        acc_list.append(accuracy(X,Y,theta))
        theta_list.append(theta.copy())
        for j in range(X.shape[1]):
            theta[j]-=learning_rate*grad[j]
    probabilty_list = predic_proba(X, theta)
    return(theta, theta_list, error_list, acc_list, probabilty_list)

def predict(x,theta):
    p=hypothesis(x,theta)
    if p<0.5:
        return 0
    else:
        return 1

def predic_proba(x,theta):
    probabilty_list = []
    for i in range(X.shape[0]):
        probability = hypothesis(X[i],theta)
        probabilty_list.append(probability)
    return probabilty_list


def accuracy(X,Y,theta):
    y_pred=[]
    for i in range(X.shape[0]):
        p=predict(X[i],theta)
        y_pred.append(p)
    y_pred=np.array(y_pred)
    y_pred=y_pred.reshape((-1,1))
    return(Y==y_pred).sum()/X.shape[0]

Training the model

In [6]:
theta, theta_list, error_list, acc_list, probabilty_list=gradient_ascent(X_train,Y_train)

Visualising Error over training set

In [7]:
fig = go.Figure(
    layout = dict(
        width = 800,
        height = 800,
        title_text = "Visualising the error",
        xaxis = dict(
            title = dict(
                text = "Iteration"
            )
        ),
        yaxis = dict(
            title = dict(
                text = "Error (Negative of maximum likelihood)"
            )
        )
    )
)

fig.add_trace(go.Scatter(
    x = [x for x in range(1,101,1)],
    y = [x[0] for x in error_list],
    mode = "lines+markers",
    name= "",
    marker = dict(
        color = "lightblue",
        line = dict(
            color = "blue",
            width = 1,
        )
    ),
    hovertemplate = "Iteration: %{x}<br>Error: %{y}",
))

fig.show()
# fig.write_html(r".\expected outputs\expectedoutput2.html")how()

Visalising accuracy over test set

In [8]:
fig = go.Figure(
    layout = dict(
        width = 800,
        height = 800,
        title_text = "Visualising the Accuracy",
        xaxis = dict(
            title = dict(
                text = "Iteration"
            )
        ),
        yaxis = dict(
            title = dict(
                text = "Accuracy"
            )
        )
    )
)

fig.add_trace(go.Scatter(
    x = [x for x in range(1,101,1)],
    y = acc_list,
    mode = "lines+markers",
    name= "",
    marker = dict(
        color = "lightblue",
        line = dict(
            color = "blue",
            width = 1,
        )
    ),
    hovertemplate = "Iteration: %{x}<br>Accuracy: %{y}",
))

fig.show()
# fig.write_html(r".\expected outputs\expectedoutput3.html")
In [9]:
print("The accuracy for the algorithm is:",acc_list[-1])
print("The final theta parameters calculated are:",theta)
The accuracy for the algorithm is: 0.96375
The final theta parameters calculated are: [-3.8480557   0.60018874  1.22204899]

Visualising the decision boundry over iterations

In [10]:
sliders_dict = {
    'active': 0,
    'yanchor': 'top',
    'xanchor': 'left',
    'currentvalue': {
        'font': {'size': 20},
        'prefix': 'No. of iterations:',
        'visible': True,
        'xanchor': 'right'
    },
    'transition': {'duration': 300, 'easing': 'linear'},
    'pad': {'b': 10, 't': 50},
    'len': 0.9,
    'x': 0.1,
    'y': 0,
    'steps': []
}

frames = []
for i in range(100):
    frame = go.Frame(
        data = [scatter_trace_1, scatter_trace_2,
            go.Scatter(
                x = np.linspace(-3,7,2), 
                y = -1*(theta_list[i][0]+np.linspace(-4,8,2)*theta_list[i][1])/theta_list[i][2],
                mode = "lines",
                name = "Decision Boundry",
                hoverinfo = "none"
            )
        ],
        name = str(i+1)
    )
    frames.append(frame)

for i in range(100):
    slider_step = {'args': [
        [i+1],{
            'frame': {'duration': 300, 'redraw': True},
            'mode': 'immediate',
            'transition': {'duration': 300}
        }],
    'label': i+1,
    'method': 'animate'}
    sliders_dict['steps'].append(slider_step)
fig = go.Figure(
    data = [scatter_trace_1, scatter_trace_2, 
            go.Scatter(
                x = np.linspace(-3,7,2), 
                y = -1*(theta_list[0][0]+np.linspace(-4,8,2)*theta_list[0][1])/theta_list[0][2],
                mode = "lines",
                name = "Decision Boundry",
                hoverinfo = "none"
            )],
    layout = go.Layout(updatemenus=[{
        'buttons': [{
            "args": [None,{"fromcurrent": True,
                           "transition": {"duration": 50,
                                          "easing": "linear"}}],
            'label': 'Play',
            'method': 'animate'
        },
        {
            'args': [[None],{'frame': {'duration': 0, 'redraw': False},
                             'mode': 'immediate',
                             'transition': {'duration': 0}}],
            'label': 'Pause',
            'method': 'animate'
        }],
        'direction': 'left',
        'pad': {'r': 10, 't': 87},
        'showactive': False,
        'type': 'buttons',
        'x': 0.1,
        'xanchor': 'right',
        'y': 0,
        'yanchor': 'top'
    }]),
    frames = frames
)
fig.update_layout(
    width = 800,
    height = 800,
    title_text = "Visualising the convergence of decision boundary",
    xaxis = dict(
        range = [-3.5,7.5],
        title = dict(
            text = "Feature_1"
        )
    ),
    yaxis = dict(
        title = dict(
            text = "Feature_2"
        )
    )
)
fig['layout']['sliders'] = [sliders_dict]
fig.show()
# fig.write_html(r".\expected outputs\expectedoutput4.html")

Plotting the decision boundry

In [11]:
fig = go.Figure(
    layout = dict(
        width = 800,
        height = 800,
        title_text = "Visualization of the decision boundary",
        xaxis = dict(
            range = [-4,8],
            title = dict(
                text = "Feature_1"
            )
        ),
        yaxis = dict(
            title = dict(
                text = "Feature_2"
            )
        )
    )
)

fig.add_trace(scatter_trace_1)
fig.add_trace(scatter_trace_2)

fig.add_trace(go.Scatter(
    x = np.linspace(-3,7,2), 
    y = -1*(theta[0]+np.linspace(-4,8,2)*theta[1])/theta[2],
    mode = "lines",
    name = "Decision Boundry",
    hoverinfo = "none"
))

fig.show()
# fig.write_html(r".\expected outputs\expectedoutput5.html")
plt.show()